package com.dkdy.kaoyan.webmagic;

import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

public class Demo2 implements PageProcessor {
	private Site site = Site.me().setRetryTimes(3).setSleepTime(100);
	public static void main(String[] args) {
		PageProcessor pageProcessor = null;
		Demo2 demo2 = new Demo2();
		
		String urls = "http://www.cnipr.com/";
		Spider.create(demo2).addUrl(urls).thread(1).run();

	}
	
	@Override
	public Site getSite() {
		return site;
	}

	@Override
	public void process(Page page) {
		 Document document = page.getHtml().getDocument();
		 Elements alist = document.getElementsByTag("a");
		 for (Element element : alist) {
			 String attr = element.attr("href");
			 
			 String ownText = element.ownText();
		
			 System.out.println(attr+"-----"+ownText);
		}
		 String string = page.getUrl().toString();
		 System.out.println("xxx");
		 System.out.println(string);
//		 page.setSkip(true);
//		System.out.println(link);
		
		
	}

}
